getwd()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Packages
set.seed(717)
library(cubature)
library(lava) 
library(ggplot2)
library(mcGlobaloptim) 
library(DiceKriging) 
library(nloptr) 
library(MASS)
library(mcmc)
library(geoR)
library(RobustCalibration)
library(lhs)
library(RobustGaSP)
library(numDeriv)
library(ggplot2)
library(rTensor)
library(plotly)
library(R.matlab)
library(proxy)
library(abind)
library(scatterplot3d)
library(Matrix)
library(purrr)
library(plyr)
library(multiway)

# tempdir()
# dir.create(tempdir())
load("data4.RData")
load("c.pre.to.list.3.RData")

data.x <- data4$x0[2996:3360,]
data.y <- data4$y0[,,2996:3360]

dim.f = dim(data.y)[1:2]
d = ncol(data.x); lower.x = apply(data.x,2,min); upper.x = apply(data.x,2,max)

dim.s = c(2,2)
dim.h = prod(dim.s); dim.mode = length(dim.s)

N = nrow(data.x)
x0 = data.x; y0 = data.y

h <- function(y) sum(y)

ind.x.star = which.max(apply(y0,3,h))
h.star = apply(y0,3,h)[ind.x.star]
full.x.star = x0[ind.x.star,]

ind.x.star; h.star; full.x.star


## Kernel
norm0 <- function(x1,x2) as.matrix(dist(x1,x2,method = "Euclidean"))
norm1 <- function(x1,x2){
  nor = list()
  for(i in 1:d){
    nor[[i]] = norm0(x1[,i],x2[,i])}
  return(nor)
} 

mat0 <- function(x) matern(x, phi=1, kappa=5/2)
gau0 <- function(x) exp(-x^2)
exp0 <- function(x) exp(-abs(x))

ker.sele <- function(x1,x2,theta,ker){
  x = norm1(x1,x2)
  dis = Map(function(x0,th) x0/th,x,theta)
  R0 = Map(function(x0) ker(x0),dis)
  R = Reduce("*", R0)
  return(R)
}

noise <- function(x1,x2){
  no.ind = which(apply(x1, 1, function(row_a) apply(x2, 1, function(row_b) all(row_a == row_b))), arr.ind = TRUE)
  no.0 = matrix(0,nrow(x2),nrow(x1))
  no.0[no.ind] = 1
  return(t(no.0))
}

B.tuck.mt <- function(y,n){
  y_0 = y
  
  tuck.re = multiway::tucker(y_0, nfac = c(dim.s,n), nstart = 1, Cfixed = diag(n))
  U.tuck = list(tuck.re$B,tuck.re$A)
  core.ten = tuck.re$G
  
  return(list(core.ten=core.ten, U.tuck=U.tuck))
}

B.tuck.mt <- function(y,n){
  y0 = lapply(1:n, function(i) rTensor::as.tensor(y[,,i]))
  U.tuck = list()
  
  mode.unfold <- function(da,k) do.call(cbind, lapply(da, function(x) {k_unfold(x,k)@data}))
  for(i in 1:dim.mode) U.tuck[[i]] = svd(mode.unfold(y0, i))$u[, 1:dim.s[i]]
  core.ten = lapply(y0, function(x) ttl(x, lapply(U.tuck, t), 1:dim.mode))
  
  return(list(core.ten=core.ten, U.tuck=U.tuck))
}



################################################################################
#### GP ########################################################################
################################################################################

################################################################################
## Our proposed method: NS-mtGP
vec.lab = list()
vec.lab[[1]] = dim.h*(dim.h+1)/2
vec.lab[[2]] = d
vec.lab[[3]] = vec.lab[[4]] = 1
group.lab <- unlist(Map(rep, LETTERS[1:length(vec.lab)], unlist(vec.lab)))
dim.hyper.mt = length(group.lab)

lower.th = c(unlist(Map(rep, c(1e-3,1e-3,1e-2,1e-10), unlist(vec.lab))))
upper.th = c(unlist(Map(rep, c(1,10,10,1e-2), unlist(vec.lab))))

sig.mt <- function(t,ome){
  O = matrix(0,t,t)
  O[lower.tri(O, diag = TRUE)] <- ome
  return(O)
}

likeli.mt <- function(x1,x2,y,n,t0,the,B.tuck){
  the0 = split(the, group.lab)
  
  k.ini = the0[[3]]*ker.sele(x1,x2,the0[[2]],mat0)+the0[[4]]*noise(x1,x2)
  omega = sig.mt(t0,the0[[1]])%*%t(sig.mt(t0,the0[[1]]))
  k.y = kronecker(k.ini,omega)
  
  log.likeli = determinant(k.y,logarithm=TRUE)$modulus+
    t(c(y))%*%solve(k.y)%*%c(y)
  return(list(like=log.likeli, the0=the0))
}
# likeli.mt(x,x,y0,n,dim.h,runif(dim.hyper.mt))


mtgp.hat <- function(x.new,x,y,n,n.test,t0,hy,B.tuck){
  x.new = matrix(x.new,n.test,d)
  omega = sig.mt(t0,hy[[1]])%*%t(sig.mt(t0,hy[[1]]))
  
  k.mt.s0 = hy[[3]]*ker.sele(x,x,hy[[2]],mat0)+hy[[4]]*noise(x,x)
  k.y = kronecker(k.mt.s0,omega)
  
  k.mt.10 = hy[[3]]*ker.sele(x.new,x,hy[[2]],mat0)+hy[[4]]*noise(x.new,x)
  k.mt.1 = kronecker(k.mt.10,omega)
  
  k.mt.00 = hy[[3]]*ker.sele(x.new,x.new,hy[[2]],mat0)+hy[[4]]*noise(x.new,x.new)
  k.mt.0 = kronecker(k.mt.00,omega)
  
  #based on tucker
  k.oth.tuck = k.mt.1%*%solve(k.y)
  
  f.hat.tuck = k.oth.tuck%*%c(y)
  var.hat.tuck = hy[[2]][1]*(k.mt.0-k.oth.tuck%*%t(k.mt.1))
  
  result = list(mean.tuck = f.hat.tuck, cov.tuck = var.hat.tuck)
  return(result)
}


EIJ <- function(i,j){
  E0 = matrix(0,dim.h,dim.h); E0[i,j] = 1
  return(E0)
}

der.l <- function(x1,x2,y,n,the,B.tuck){
  the0 = split(the, group.lab)
  ome = the0[[1]]; th = the0[[2]]; sig2 = the0[[3]]; tau2 = the0[[4]]
  
  J <- function(i){
    E0 = matrix(0,dim.h,dim.h); E0[i,i] = exp(sig.mt(dim.h,the0[[1]])[i,i])
    return(E0)
  }
  
  k.ini0 = ker.sele(x1,x2,the0[[2]],mat0)
  k.ini = the0[[3]]*k.ini0+the0[[4]]*noise(x1,x2)
  omega = sig.mt(dim.h,the0[[1]])%*%t(sig.mt(dim.h,the0[[1]]))
  sol.o = ginv(omega)
  
  k.y = kronecker(k.ini,omega); sol.k.y = ginv(k.y)

  al.k = sol.k.y%*%c(y)
  al.k1 = kronecker(k.ini0,omega)
  al.k2 = kronecker(diag(n),omega)
  der.l.sig = tr(sol.k.y%*%al.k1)-t(al.k)%*%al.k1%*%al.k
  der.l.tau2 = tr(sol.k.y%*%al.k2)-t(al.k)%*%al.k2%*%al.k
  
  der.th = array(jacobian(function(theta) ker.sele(x1,x2,theta,mat0), th),dim=c(n,n,d))
  der.l.th.i <- function(der) sig2*(tr(sol.k.y%*%kronecker(der,omega))-
                                      t(al.k)%*%kronecker(der,omega)%*%al.k)
  der.l.th = apply(der.th,3,der.l.th.i)
  
  
  der.l.phi.ij <- function(i,j) as.numeric(tr(sol.k.y%*%kronecker(k.ini,(EIJ(i,j)%*%t(omega)+omega%*%EIJ(j,i))))-
                                             t(al.k)%*%kronecker(k.ini,(EIJ(i,j)%*%t(omega)+omega%*%EIJ(j,i)))%*%al.k)
  der.l.phi.1 = sapply(c(1:dim.h), function(i) {
    sapply(c(1:i), function(j) der.l.phi.ij(i, j))
  })
  
  der.l.phi.ii <- function(i) tr(sol.k.y%*%kronecker(k.ini,(J(i)%*%t(omega)+omega%*%J(i))))-
    t(al.k)%*%kronecker(k.ini,(J(i)%*%t(omega)+omega%*%J(i)))%*%al.k
  der.l.phi.dig = apply(as.matrix(c(1:dim.h)),1, der.l.phi.ii)
  
  der.l.phi = matrix(0, dim.h, dim.h)
  for (i in 1:dim.h) {
    der.l.phi[i, 1:i] <- der.l.phi.1[[i]]
  }
  diag(der.l.phi) = der.l.phi.dig
  
  result = list(der.l.phi=der.l.phi[lower.tri(der.l.phi, diag = TRUE)], der.l.th=der.l.th, der.l.sig=der.l.sig, der.l.tau2=der.l.tau2)
  return(result)
}
# der.l(x,x,y0.tuck,k.ind,n,runif(dim.hyper.mt))



n.train = pre.to.list$n.train; n.test = pre.to.list$n.test
ind.train = pre.to.list$ind.train; ind.test = pre.to.list$ind.test

x.train = pre.to.list$x.train; y.train = pre.to.list$y.train
x.test = pre.to.list$x.test; y.test = pre.to.list$y.test

B.tuck.train = pre.to.list$B.tuck.train
y0.train = pre.to.list$y0.train

hyper.mt.train.ini = directL(function(the) likeli.mt(x.train,x.train,y0.train,n.train,dim.h,the,B.tuck.train)$like,
                             lower.th,upper.th,control=list(maxeval=1000))$par
hyper.mt.train = optim(par = hyper.mt.train.ini,
                       fn = function(the) likeli.mt(x.train,x.train,y0.train,n.train,dim.h,the,B.tuck.train)$like,
                       gr = function(the) unlist(der.l(x.train,x.train,y0.train,n.train,the,B.tuck.train)),
                       method = "L-BFGS-B", lower=lower.th, upper=upper.th)$par

likeli.train = -0.5*likeli.mt(x.train,x.train,y0.train,n.train,dim.h,hyper.mt.train,B.tuck.train)$like
hy.mt.train = likeli.mt(x.train,x.train,y0.train,n.train,dim.h,hyper.mt.train,B.tuck.train)$the0

pre.mt.train = mtgp.hat(x.train,x.train,y0.train,n.train,n.train,dim.h,hy.mt.train,B.tuck.train)

mean.tuck.mt.train = pre.mt.train$mean.tuck
var.tuck.hat.mt.train = pre.mt.train$cov.tuck

mean(abs((mean.tuck.mt.train-c(y0.train))/c(y0.train)))
sqrt(Matrix::norm(var.tuck.hat.mt.train,type="2"))

pre.mt.test = mtgp.hat(x.test,x.train,y0.train,n.train,n.test,dim.h,hy.mt.train,B.tuck.train)
mean.mt.test = pre.mt.test$mean.tuck
var.hat.mt.test = pre.mt.test$cov.tuck

B.test = kronecker(diag(n.test),B.tuck.train)
mse.mt = mean((B.test%*%mean.mt.test-c(y.test))^2)
cov.op = sqrt(Matrix::norm(B.test%*%var.hat.mt.test%*%t(B.test),type="2"))
mse.mt; cov.op

pre.mt.list = list(n.train=n.train, n.test=n.test, ind.train=ind.train, ind.test=ind.test,
                   x.train=x.train, y.train=y.train, x.test=x.test, y.test=y.test,
                   B.tuck.train=B.tuck.train, y0.train=y0.train,
                   nll.mt=likeli.train, mse.mt=mse.mt, cov.op=cov.op)
save(pre.mt.list, file="c.pre.mt.list.RData")



################################################################################
## Our proposed method: NS-mtGP-UCB
n = c.ini.set$n; m = c.ini.set$m; J.for = c.ini.set$J.for

like.re.mt = hyper.mt = fhat = lapply(1:J.for, function(x) list())
x0.mt = y0.mt = y0.tuck.mt = ind.x.mt = list()
mtgp.bo = h.mt = list()
regret.mt = ins.regret.mt = cum.regret.mt = list()
beta.mt = ucb.new.mt = lapply(1:J.for, function(x) list())


for(j.for.mt in 1:J.for){
  
  ind.x = c.ini.set$ind.x.for[[j.for.mt]]
  x = data.x[ind.x,]; y=y0[,,ind.x]

  y0.tuck = B.tuck.mt(y,n); B.tuck = Reduce(kronecker,y0.tuck$U.tuck)
  y0.0 = lapply(y0.tuck$core.ten, function(c) c@data)
  y.tuck = abind(y0.0, along = dim.mode+1)
  

  ## Setting
  ######################################## BO ####################################
  hyper.mt.old = directL(function(the) likeli.mt(x,x,y.tuck,n,dim.h,the,B.tuck)$like,lower.th,upper.th,control=list(maxeval=1000))$par
  hyper.mt.new = optim(par = hyper.mt.old, 
                       fn = function(the) likeli.mt(x,x,y.tuck,n,dim.h,the,B.tuck)$like,
                       gr = function(the) unlist(der.l(x,x,y.tuck,n,the,B.tuck)), 
                       method = "L-BFGS-B", lower=lower.th, upper=upper.th)$par
  
  like.re.mt[[j.for.mt]][[1]] = likeli.mt(x,x,y.tuck,n,dim.h,hyper.mt.new,B.tuck)
  hyper.mt[[j.for.mt]][[1]] = like.re.mt[[j.for.mt]][[1]]$the0
  
  
  x0 = data.x; inx0 = c(1:N)
  x0.mt[[j.for.mt]] = x; y0.mt[[j.for.mt]] = y; ind.x.mt[[j.for.mt]] = ind.x
  y0.tuck.mt[[j.for.mt]] = y.tuck; n.mt = n
  
  x.new.mt = t(as.matrix(x[which.max(apply(y,3,h)),]))
  y.new.mt = y[,,which.max(apply(y,3,h))]
  
  hyper.mt.ucb = unlist(hyper.mt[[j.for.mt]][[1]]); delta.mt = 0.05
  fhat[[j.for.mt]][[1]] = mtgp.hat(x.new.mt,x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],
                                   n.mt,1,dim.h,hyper.mt[[j.for.mt]][[1]],B.tuck)
  
  for(i.mt in 1:m){
    sig2 = hyper.mt[[j.for.mt]][[i.mt]][[2]][2]
    eta = hyper.mt[[j.for.mt]][[i.mt]][[2]][2]
    it1 = Map(function(A) determinant(diag(dim.h)+1/eta*A[[2]],logarithm=TRUE)$modulus,fhat[[j.for.mt]])
    beta.mt[[j.for.mt]][[i.mt]] = sqrt(Matrix::norm(B.tuck%*%fhat[[j.for.mt]][[1]]$mean.tuck,type="2"))+
      sqrt(sig2/eta)*sqrt(2*log(1/delta.mt)+Reduce(sum,it1))
    
    ucb.mt <- function(x.new,beta){
      x.new = matrix(x.new,1,d)
      mtgp.output = mtgp.hat(x.new,x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],
                             n.mt,n.test=1,dim.h,hyper.mt[[j.for.mt]][[i.mt]],B.tuck)
      ucb = sum(B.tuck%*%mtgp.output$mean.tuck)+beta*sqrt(Matrix::norm(mtgp.output$cov.tuck,type="2"))
      return(ucb)
    }
    
    ucb.x = apply(x0[-ind.x.mt[[j.for.mt]],],1,function(x.new) -ucb.mt(x.new,beta.mt[[j.for.mt]][[i.mt]]))
    ind.x.new.mt = inx0[-ind.x.mt[[j.for.mt]]][which.min(ucb.x)] #which(apply(x0, 1, function(row) all(row == x0[-ind.x.mt[[j.for.mt]],][which.min(ucb.x),])))[1]
    x.new.mt = x0[ind.x.new.mt,]; y.new.mt = y0[,,ind.x.new.mt]
    y.tuck.new.mt = abind(ttl(rTensor::as.tensor(y.new.mt), 
                              lapply(y0.tuck$U.tuck,t), 1:dim.mode)@data, along=dim.mode+1)
    
    ucb.new.mt[[j.for.mt]][[i.mt]] = ucb.mt(x.new.mt,beta.mt[[j.for.mt]][[i.mt]])
    fhat[[j.for.mt]][[i.mt+1]] = mtgp.hat(x.new.mt,x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],
                                          n.mt,n.test=1,dim.h,hyper.mt[[j.for.mt]][[i.mt]],B.tuck)
  
    x0.mt[[j.for.mt]] = rbind(x0.mt[[j.for.mt]], x.new.mt)
    y0.mt[[j.for.mt]] = abind(y0.mt[[j.for.mt]], y.new.mt, along = dim.mode+1)
    y0.tuck.mt[[j.for.mt]] = abind(y0.tuck.mt[[j.for.mt]], y.tuck.new.mt, along = dim.mode+1)
    ind.x.mt[[j.for.mt]] = c(ind.x.mt[[j.for.mt]],ind.x.new.mt)
    n.mt = n+i.mt
    
    if(i.mt %% 150 == 0){
      hyper.mt.ucb = optim(par = unlist(hyper.mt[[j.for.mt]][[i.mt]]), 
                           fn = function(the) likeli.mt(x0.mt[[j.for.mt]],x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],n.mt,dim.h,the,B.tuck)$like, 
                           gr = function(the) unlist(der.l(x0.mt[[j.for.mt]],x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],n.mt,the,B.tuck)), 
                           method = "L-BFGS-B", lower=lower.th, upper=upper.th)$par
    }else{
      hyper.mt.ucb = hyper.mt.ucb
    }
    
    like.re.mt[[j.for.mt]][[i.mt+1]] = likeli.mt(x0.mt[[j.for.mt]],x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],
                                                 n.mt,dim.h,hyper.mt.ucb,B.tuck)
    hyper.mt[[j.for.mt]][[i.mt+1]] = like.re.mt[[j.for.mt]][[i.mt+1]]$the0
    print(i.mt)
  }
  
  mtgp.ucb <- function(x.new,ind.new) mtgp.hat(x.new,ind.new,x0.mt[[j.for.mt]],y0.tuck.mt[[j.for.mt]],
                                               n.mt,n.test=1,dim.h,hyper.mt[[j.for.mt]][[i.mt+1]],B.tuck) 
  mtgp.bo[[j.for.mt]] = mtgp.ucb
  
  h.mt[[j.for.mt]] = apply(y0.mt[[j.for.mt]],3,h)
  regret.mt[[j.for.mt]] = h.star-h.mt[[j.for.mt]]

  layout(matrix(1, nrow = 1, ncol = 1))
  plot(rep(h.star,(n.mt-n+1)),type="b",lwd=3,lty=1,pch=1,col=1,ylim=c(cummax(h.mt[[j.for.mt]])[n],h.star))
  lines(cummax(h.mt[[j.for.mt]])[n:n.mt],type="b",lwd=3,lty=2,pch=2,col=2)
  
  ins.regret.mt[[j.for.mt]] = h.star-cummax(h.mt[[j.for.mt]][(n+1):n.mt])
  cum.regret.mt[[j.for.mt]] = cumsum(ins.regret.mt[[j.for.mt]])
  
  plot(cumsum(ins.regret.mt[[j.for.mt]]),type="b",lwd=3,lty=2,pch=2,col=2)
  print(j.for.mt)
}

mat <- do.call(rbind, cum.regret.mt)
cum.regret.mean <- apply(mat,2, mean)
cum.regret.lower <- apply(mat,2, function(x) quantile(x, 0.025))
cum.regret.upper <- apply(mat,2, function(x) quantile(x, 0.975))


time <- 1:m
plot(time, cum.regret.mean, type = "l", lwd = 2, col = "blue", ylim = range(c(cum.regret.lower, cum.regret.upper)),
     ylab = "Value", xlab = "Time", main = "Mean with 95% Confidence Band")
polygon(c(time, rev(time)),
        c(cum.regret.upper, rev(cum.regret.lower)),
        col = rgb(0.1, 0.2, 0.9, 0.2), border = NA)
lines(time, cum.regret.mean, col = "blue", lwd = 2)



fmt.ucb.list = list(ind.x.star=ind.x.star, h.star=h.star, ind.star=ind.star, ora.x.star=ora.x.star,
                    like.re.mt=like.re.mt, hyper.mt=hyper.mt, fhat=fhat,
                    x0.mt=x0.mt, y0.mt=y0.mt, mtgp.bo=mtgp.bo, ind.x.mt=ind.x.mt,
                    h.mt=h.mt, regret.mt=regret.mt, ins.regret.mt=ins.regret.mt, cum.regret.mt=cum.regret.mt,
                    beta.mt=beta.mt, ucb.new.mt=ucb.new.mt)
save(fmt.ucb.list, file="c4.fmt.ucb.list.RData")






